#This script reads the spk2utt file and creates a utt2spkid file. this also records the mapping between speaker name and the 
# speaker id in spk2spkid file.
import sys

def main():
    
    data_dir=sys.argv[1]
    print data_dir
    #data_dir='exp_pdnn/dnn6_fbank_new/data/train/'

    spk2utt_file = open(data_dir+'spk2utt','r')
    utt2spkid_file = open(data_dir+'label','w')
    spk2spkid_file = open(data_dir+'spk2spkid','w')

    utt2spkid_map={}
    count = 0
    for line in spk2utt_file:
        temp_array = (line.rstrip()).split()
        utt2spkid_map[count] = temp_array[0]
        for i in range(1,len(temp_array)):
            utt2spkid_file.writelines(temp_array[i]+' '+str(count) +'\n')

        count=count+1
    spk2utt_file.close()
    utt2spkid_file.close()


    for key in utt2spkid_map.keys():
        spk2spkid_file.write(utt2spkid_map.get(key) + ' ' + str(key) +'\n' )

    spk2spkid_file.close()




if __name__=="__main__":
    main()

